#################################################################################
#                 Descriptive Statistical Analysis
################################################################################

### read the data
df <- read.csv("eval_sun_smith.csv") %>%
  filter(status=="COMPLETE") %>%  
  filter(traineePGY!="6"&traineePGY!="7"&traineePGY!="8"&traineePGY!="9"&traineePGY!="11"&traineePGY!="NULL")

range(format(as.Date(df$date, format = "%d/%m/%Y"), "%Y-%m-%d"))

#### number of trainees by PGY
traineeT <- length(unique(df$subjectID[df$subjectRole=="Trainee"]))

traineePGY <- df %>%
  filter(subjectRole=="Trainee")%>%
  group_by(traineePGY)%>%
  dplyr::summarise(count=n_distinct(subjectID),.groups = 'drop') %>% 
  mutate(Total=traineeT) %>%
  mutate("%" = formattable::percent(count/Total))

### Number of trainees by GPY and gender
traineePGYGender <- df %>%
  filter(subjectRole=="Trainee")%>%
  filter(subjectGender!=0&subjectGender!=9)%>%
  mutate(subjectGender=dplyr::recode(subjectGender,"1"="Male","2"="Female"))%>%
  group_by(traineePGY,subjectGender)%>%
  dplyr::summarise(count=n_distinct(subjectID),.groups = 'drop') 

#### Number of trainees by gender
tranineeGender <- df %>%
  filter(subjectRole=="Trainee")%>%
  filter(subjectGender!=0&subjectGender!=9)%>%
  mutate(subjectGender=dplyr::recode(subjectGender,"1"="Male","2"="Female"))%>%
  group_by(subjectGender)%>%
  dplyr::summarise(count=n_distinct(subjectID),.groups = 'drop') %>% 
  spread(subjectGender,count)%>%
  mutate(Total=traineeT) %>%
  mutate("Male%" = formattable::percent(Male/Total))%>%
  mutate("Female%" = formattable::percent(Female/Total))
 
#### number of faculty by PGY
facultyT <-length(unique(df$raterID[df$raterRole=="Attending"]))
facultyPGY <- df %>%
  filter(raterRole=="Attending")%>%
  group_by(traineePGY)%>%
  dplyr::summarise(count=n_distinct(raterID),.groups = 'drop')%>% 
  mutate(Total=facultyT) %>%
  mutate("%" = formattable::percent(count/Total))

#### Number of faculty by gender
gacultyGender <- df %>%
  filter(raterRole=="Attending")%>%
  filter(raterGender!=0&raterGender!=9)%>%
  mutate(raterGender=dplyr::recode(raterGender,"1"="Male","2"="Female"))%>%
  group_by(raterGender)%>%
  dplyr::summarise(count=n_distinct(raterID),.groups = 'drop') %>% 
  spread(raterGender,count)%>%
  mutate(Total=facultyT) %>%
  mutate("Male%" = formattable::percent(Male/Total))%>%
  mutate("Female%" = formattable::percent(Female/Total))


#### number of programs
length(unique(df$programID))
#### number of evaluation by Faculty by PGY
evaluationPGY_faculty <- df %>%
  filter(raterRole=="Attending")%>%
  group_by(traineePGY)%>%
  dplyr::summarise(count=n_distinct(evaluationID),.groups = 'drop')

#### number of evaluation by Residents by PGY
length(unique(df$evaluationID))
length(unique(df$procID))
evaluationPGY_trainee <- df %>%
  filter(raterRole=="Trainee")%>%
  group_by(traineePGY)%>%
  dplyr::summarise(count=n_distinct(evaluationID),.groups = 'drop')

#### number of procedure by PGY
procedurePGY <- df %>%
  group_by(traineePGY)%>%
  dplyr::summarise(count=n_distinct(procID),.groups = 'drop')


#### mean of autonomy by PGY
autonomy <- df %>% 
  mutate(supervision_r = dplyr::recode(supervision,"1"=1,"2"=2,"3"=3,"4"=4)) %>%
  group_by(evaluationID)%>%
  filter(row_number()==1)%>%
  filter(raterRole=="Attending")%>%
  group_by(traineePGY)%>%
  dplyr::summarise(mean=mean(supervision_r), 
                   sd=sd(supervision_r),
                   median=median(supervision_r),
                   quant25 = quantile(supervision_r, probs = 0.25),
                   quant50 = quantile(supervision_r, probs = 0.5),
                   quant75 = quantile(supervision_r, probs = 0.75),groups = 'drop')


#### mean of performance by PGY
performance <- df %>% 
  filter(performance!="NULL") %>%
  filter(performance!=6) %>%  
  mutate(performance_r = dplyr::recode(performance,"1"=5,"2"=4,"3"=3,"4"=2,"5"=1))%>%
  group_by(evaluationID)%>%
  filter(row_number()==1)%>%
  filter(raterRole=="Attending")%>%
  group_by(traineePGY)%>%
  dplyr::summarise(mean=mean(performance_r), 
                   sd=sd(performance_r),
                   median=median(performance_r),
                   quant25 = quantile(performance_r, probs = 0.25),
                   quant50 = quantile(performance_r, probs = 0.5),
                   quant75 = quantile(performance_r, probs = 0.75))


### Number of not autonomous assessments
autonomy_not <- df %>%
  filter(raterRole=="Attending") %>%
  filter(supervision=="1"|supervision=="2")%>%
  group_by(traineePGY)%>%
  dplyr::summarise(count=n_distinct(evaluationID),.groups = 'drop')



### Number of not practice ready assessments
performance_not <- df %>%
  filter(raterRole=="Attending") %>%
  filter(performance=="1"|performance=="2"|performance=="3") %>%
  group_by(traineePGY) %>%
  dplyr::summarise(count=n_distinct(evaluationID),.groups = 'drop')


### Number of trainees who were assessment with only one procedure
#all PGY years
df%>%
  filter(raterRole=="Attending")%>%
  group_by(subjectID)%>%
  dplyr::summarise(count=n_distinct(procID),.groups = 'drop')%>%
  arrange(desc(count))%>%
  filter(count == 1)

df%>%
  filter(raterRole=="Attending")%>%
  group_by(subjectID)%>%
  dplyr::summarise(count=n_distinct(procID),.groups = 'drop')%>%
  arrange(desc(count))%>%
  filter(count < 10)

#PGY 5
df%>%
  filter(raterRole=="Attending")%>%
  filter(traineePGY==5)%>%
  group_by(subjectID)%>%
  dplyr::summarise(count=n_distinct(procID),.groups = 'drop')%>%
  arrange(desc(count))%>%
  filter(count == 1)

df%>%
  filter(raterRole=="Attending")%>%
  filter(traineePGY==5)%>%
  group_by(subjectID)%>%
  dplyr::summarise(count=n_distinct(procID),.groups = 'drop')%>%
  arrange(desc(count))%>%
  filter(count < 17)










